from gxl_ai_utils.utils import utils_file

output_dir = "/home/A02_tmpdata2/data/test_speechio/speechio_0-5"
utils_file.makedir_sil(output_dir)
new_list = []

for i in range(5):
    tmp_dir = f'/home/A02_tmpdata2/data/test_speechio/speechio_{i}'
    data_list_path = f'{tmp_dir}/data.list'
    dict_list = utils_file.load_dict_list_from_jsonl(data_list_path)
    new_list.extend(dict_list)

new_text_dict = {}
for dict_i in new_list:
    wav_path = dict_i['wav']
    new_wav_path = wav_path.replace('/home/work_nfs8/xlgeng/data/scp_test/speechio_', '/home/A02_tmpdata2/data/test_speechio/speechio_')
    dict_i['wav'] = new_wav_path
    key = dict_i['key']
    text_str = dict_i['txt']
    new_text_dict[key] = text_str




utils_file.write_dict_list_to_jsonl(new_list, f'{output_dir}/data.list')
utils_file.write_dict_to_scp(new_text_dict, f'{output_dir}/text.scp')
